package com.doit.spark.day03

import com.doit.spark.day01.utils.SparkUtil
import org.apache.spark.rdd.RDD

/**
 * @DATE 2022/1/5/22:54
 * @Author MDK
 * @Version 2021.2.2
 * */
object C06_WordCount02 {
  def main(args: Array[String]): Unit = {
    val sc = SparkUtil.getSc
    val rdd: RDD[String] = sc.textFile("data/word.txt")
    val word: RDD[String] = rdd.flatMap(_.split("\\s+"))
    val res: RDD[(String, Int)] = word.map((_, 1)).reduceByKey(_ + _)
    //数据产生了shuffle  统计结果是两个集合
    res.glom().map(_.toList).foreach(println)
  }
}
